{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 近似求导"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "7.999999999785956\n"
     ]
    }
   ],
   "source": [
    "def f(x):\n",
    "    return 3. * x ** 2 + 2. * x - 1\n",
    "#近视求导，x移动eps单位，也就是离自己很近的一个点的切线 符号求导是6x+2\n",
    "def approximate_derivative(f, x, eps=1e-6):\n",
    "    return (f(x + eps) - f(x - eps)) / (2. * eps)\n",
    "\n",
    "print(approximate_derivative(f, 1.))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def g(x1, x2):\n",
    "    return (x1 + 5) * (x2 ** 2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "在点(1, 2)处，g对x1的偏导数为: 4.000000000559112\n",
      "在点(1, 2)处，g对x2的偏导数为: 24.000000001578314\n"
     ]
    }
   ],
   "source": [
    "# 对多变量函数求偏导数\n",
    "def partial_derivative(f, x1, x2, var, eps=1e-6):\n",
    "    \"\"\"\n",
    "    计算多变量函数的偏导数\n",
    "    \n",
    "    参数:\n",
    "        f: 多变量函数\n",
    "        x1, x2: 计算偏导数的点坐标\n",
    "        var: 对哪个变量求偏导，'x1'或'x2'\n",
    "        eps: 微小变化量\n",
    "    \n",
    "    返回:\n",
    "        指定变量的偏导数值\n",
    "    \"\"\"\n",
    "    if var == 'x1':\n",
    "        return (f(x1 + eps, x2) - f(x1 - eps, x2)) / (2 * eps)\n",
    "    elif var == 'x2':\n",
    "        return (f(x1, x2 + eps) - f(x1, x2 - eps)) / (2 * eps)\n",
    "    else:\n",
    "        raise ValueError(\"var参数必须是'x1'或'x2'\")\n",
    "\n",
    "# 计算g函数在点(1, 2)处对x1和x2的偏导数\n",
    "x1_point, x2_point = 1, 2\n",
    "\n",
    "# 对x1求偏导数：∂g/∂x1 = x2^2\n",
    "derivative_x1 = partial_derivative(g, x1_point, x2_point, 'x1')\n",
    "print(f\"在点({x1_point}, {x2_point})处，g对x1的偏导数为: {derivative_x1}\")\n",
    "# 理论值为 x2^2 = 2^2 = 4\n",
    "\n",
    "# 对x2求偏导数：∂g/∂x2 = 2(x1+5)x2\n",
    "derivative_x2 = partial_derivative(g, x1_point, x2_point, 'x2')\n",
    "print(f\"在点({x1_point}, {x2_point})处，g对x2的偏导数为: {derivative_x2}\")\n",
    "# 理论值为 2(x1+5)x2 = 2(1+5)*2 = 24\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(8.999999999993236, 41.999999999994486)\n"
     ]
    }
   ],
   "source": [
    "#求偏导数,其中一个数不动，对另外一个变量求导\n",
    "def g(x1, x2):\n",
    "    return (x1 + 5) * (x2 ** 2)\n",
    "\n",
    "def approximate_gradient(g, x1, x2, eps=1e-3):\n",
    "    dg_x1 = approximate_derivative(lambda x: g(x, x2), x1, eps)\n",
    "    dg_x2 = approximate_derivative(lambda x: g(x1, x), x2, eps)\n",
    "    return dg_x1, dg_x2\n",
    "\n",
    "print(approximate_gradient(g, 2., 3.))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## torch 近似求导"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([4.])\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "\n",
    "x1 = torch.tensor([1.], requires_grad=True) # 创建一个标量tensor，并设置requires_grad=True以便进行自动求导\n",
    "x2 = torch.tensor([2.], requires_grad=True)\n",
    "y = g(x1, x2) #前向计算是 构建计算图的过程\n",
    "    \n",
    "(dy_dx1,) = torch.autograd.grad(y, x1,retain_graph=True)\n",
    "print(dy_dx1)\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([4.])\n"
     ]
    }
   ],
   "source": [
    "try:\n",
    "    (dy_dx1,) = torch.autograd.grad(y, x1,retain_graph=True)\n",
    "except Exception as e:\n",
    "    print(e)\n",
    "\n",
    "print(dy_dx1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([9.]) tensor([42.])\n"
     ]
    }
   ],
   "source": [
    "x1 = torch.tensor([2.], requires_grad=True)\n",
    "x2 = torch.tensor([3.], requires_grad=True)\n",
    "y = g(x1, x2)\n",
    "\n",
    "# 求偏导数\n",
    "dy_dx1, dy_dx2 = torch.autograd.grad(y, [x1, x2],retain_graph=True)\n",
    "\n",
    "\n",
    "print(dy_dx1, dy_dx2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 使用backward"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([9.]) tensor([42.])\n"
     ]
    }
   ],
   "source": [
    "x1 = torch.tensor([2.], requires_grad=True)\n",
    "x2 = torch.tensor([3.], requires_grad=True)\n",
    "y = g(x1, x2) #前向计算是 构建计算图的过程\n",
    "\n",
    "# 使用backward求导\n",
    "y.backward()\n",
    "\n",
    "# 获取梯度\n",
    "print(x1.grad, x2.grad)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 理解优化器的梯度清空 optimizer.zero_grad()，和    optimizer.step()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor(-0.3333, requires_grad=True)\n",
      "tensor(-1.3333, grad_fn=<SubBackward0>)\n"
     ]
    }
   ],
   "source": [
    "#模拟梯度下降算法 SGD\n",
    "import torch\n",
    "learning_rate = 0.3\n",
    "x = torch.tensor(2.0, requires_grad=True)\n",
    "for _ in range(100):\n",
    "    z = f(x) #前向传播\n",
    "    if _>0:\n",
    "        x.grad.zero_() # x.grad -= x.grad, x.grad = 0,梯度清零\n",
    "    z.backward()\n",
    "    x.data -= learning_rate * x.grad\n",
    "    # x.data.sub_(learning_rate * x.grad) # x -= learning_rate * x.grad，这里就等价于optimizer.step()\n",
    "   \n",
    "print(x)\n",
    "print(f(x))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "优化后的x值: -0.3333333432674408\n",
      "对应的函数值: -1.3333332538604736\n"
     ]
    }
   ],
   "source": [
    "# 使用PyTorch的优化器实现梯度下降\n",
    "import torch\n",
    "import torch.optim as optim\n",
    "\n",
    "# 定义初始值和学习率\n",
    "x = torch.tensor(2.0, requires_grad=True)\n",
    "learning_rate = 0.3\n",
    "\n",
    "# 创建SGD优化器\n",
    "optimizer = optim.SGD([x], lr=learning_rate)\n",
    "\n",
    "# 进行100次迭代优化\n",
    "for _ in range(100):\n",
    "    # 计算函数值（前向传播）\n",
    "    z = f(x)\n",
    "    \n",
    "    # 清空梯度\n",
    "    optimizer.zero_grad()\n",
    "    \n",
    "    # 反向传播计算梯度\n",
    "    z.backward()\n",
    "    \n",
    "    # 更新参数\n",
    "    optimizer.step()\n",
    "\n",
    "print(f\"优化后的x值: {x}\")\n",
    "print(f\"对应的函数值: {f(x)}\")\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
